Data

Basic descriptives of overall activity

# PER INFLUENCER
tw <- tw %>%
  mutate(PROFILE = gsub("^.*\\.com/([^/]+).*", "\\1", URL))
tw <- as.data.table(tw)

# most active profiles
unique(tw[,.N,PROFILE][order(-N)]) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most popular
tw %>% 
  group_by(PROFILE) %>%
  summarise(FOLLOW = mean(FOLLOWERS_COUNT)) %>%
  arrange(desc(FOLLOW)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>% 
  group_by(PROFILE) %>%
  summarise(REACH = sum(REACH)) %>%
  arrange(desc(REACH)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>% 
  group_by(PROFILE) %>%
  summarise(INTERACTIONS = sum(INTERACTIONS)) %>%
  arrange(desc(INTERACTIONS)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  group_by(PROFILE) %>%
  summarise(FAVORITE = sum(FAVORITE_COUNT)) %>%
  arrange(desc(FAVORITE)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  group_by(PROFILE) %>%
  summarise(RETWEET = sum(RETWEET_COUNT)) %>%
  arrange(desc(RETWEET)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# PER TWEET

# most popular
tw %>% 
  select(PROFILE, FULL_TEXT, FOLLOWERS_COUNT,URL) %>%
  arrange(desc(FOLLOWERS_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
tw %>% 
  select(PROFILE, FULL_TEXT, REACH,URL) %>%
  arrange(desc(REACH))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
tw %>% 
  select(PROFILE, FULL_TEXT, INTERACTIONS,URL) %>%
  arrange(desc(INTERACTIONS))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  select(PROFILE, FULL_TEXT, FAVORITE_COUNT,URL) %>%
  arrange(desc(FAVORITE_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
tw %>% 
  select(PROFILE, FULL_TEXT, RETWEET_COUNT,URL) %>%
  arrange(desc(RETWEET_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))

check twitter activity on CRO supply side

# select relevant CRO profiles
unique(tw[,.N,FROM][order(-N)]) %>%
  filter(N > 5) %>% 
  pull(FROM) -> CRO_TW

tw[FROM %in% CRO_TW,] -> CTW

CTW %>%
  mutate(PROFILE = gsub("^.*\\.com/([^/]+).*", "\\1", URL)) -> CTW

# most popular
CTW %>% 
  group_by(PROFILE) %>%
  summarise(FOLLOW = mean(FOLLOWERS_COUNT)) %>%
  arrange(desc(FOLLOW))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
CTW %>% 
  group_by(PROFILE) %>%
  summarise(REACH = sum(REACH)) %>%
  arrange(desc(REACH)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
CTW %>% 
  group_by(PROFILE) %>%
  summarise(INTERACTIONS = sum(INTERACTIONS)) %>%
  arrange(desc(INTERACTIONS))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  group_by(PROFILE) %>%
  summarise(FAVORITE = sum(FAVORITE_COUNT)) %>%
  arrange(desc(FAVORITE)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  group_by(PROFILE) %>%
  summarise(RETWEET = sum(RETWEET_COUNT)) %>%
  arrange(desc(RETWEET))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# PER TWEET

# most popular
CTW %>% 
  select(PROFILE, FULL_TEXT, FOLLOWERS_COUNT,URL) %>%
  arrange(desc(FOLLOWERS_COUNT))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential
CTW %>% 
  select(PROFILE, FULL_TEXT, REACH,URL) %>%
  arrange(desc(REACH))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most influential II
CTW %>% 
  select(PROFILE, FULL_TEXT, INTERACTIONS,URL) %>%
  arrange(desc(INTERACTIONS)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  select(PROFILE, FULL_TEXT, FAVORITE_COUNT,URL) %>%
  arrange(desc(FAVORITE_COUNT)) %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
# most appreciated
CTW %>% 
  select(PROFILE, FULL_TEXT, RETWEET_COUNT,URL) %>%
  arrange(desc(RETWEET_COUNT))  %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))

Check forum activity

##                word sentiment brija
##  1:         mladost  0.324070   NEG
##  2:          matkun  0.080595   POZ
##  3:           brend  0.302140   NEG
##  4:        povećalo  0.458300   POZ
##  5:          radnik  0.270550   POZ
##  6:            pakt  0.483980   POZ
##  7:    propitkivati  0.386890   POZ
##  8:        stignuti  0.558510   NEG
##  9:         danteov  0.344940   NEG
## 10:     pripovjedan  0.394750   POZ
## 11: nepromjenjivost  0.124840   NEG
## 12:       ispuštati  0.504460   NEG
## 13:      elektorski  0.042709   NEG
## 14:             aco  0.532640   NEG
## 15:          korpus  0.518950   POZ
# read in data
forum <- as.data.table(forum)


forum[,.N, TITLE][order(-N)] %>% 
  slice(1:1000) %>%
  datatable(., options = list(scrollX = TRUE, scrollY = "500px"))
forum[TITLE == "Zoran Milanović, predsjednik Republike Hrvatske vol. IV",] %>% 
  unnest_tokens(word,FULL_TEXT) -> ZM_token
  
  

# remove stop words, numbers, single letters
ZM_token %>% 
  anti_join(stop_corpus, by = "word") %>%
  mutate(word = gsub("\\d+", NA, word)) %>%
  mutate(word = gsub("^[a-zA-Z]$", NA, word)) -> ZM_tokenTidy
# remove NA
ZM_tokenTidy %>%
  filter(!is.na(word)) -> ZM_tokenTidy

ZM_tokenTidy[,.N,by = word][order(-N),]
##               word    N
##     1:       quote 3264
##     2:   milanović 1643
##     3:         hdz 1274
##     4:        onda  838
##     5: predsjednik  811
##    ---                 
## 38457:       sišao    1
## 38458:    oblacima    1
## 38459:      smotre    1
## 38460:   rasipanje    1
## 38461:    hašomana    1
## Vizualize most common words
ZM_tokenTidy[,.N,by = word][N>500][order(-N),][,word := reorder(word,N)] %>%
  ggplot(aes(word, N)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  theme_economist()